package com.daervin.svc.parser.sub;

import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

import static com.daervin.svc.common.constants.SourceEnum.HE_XUN;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class HexunSubPageParser extends RootSubParser {

    public HexunSubPageParser(String... listUrl) {
        super(listUrl);
    }

    @Override
    public MatchOther processPage(Page page) {
        Html html = page.getHtml();
        if (html == null) {
            return MatchOther.NO;
        }

        String title = html.xpath("//div[@class='articleName']/h1/text()").get();
        String date = html.xpath("//div[@class='articleName']//span[@class='pr20']/text()").get();
        List<Selectable> pList = html.xpath("//div[@class='art_contextBox']//p").nodes();
        String desc = "";
        if (!CollectionUtils.isEmpty(pList)) {
            for (Selectable p : pList) {
                String pTxt = p.get().replaceAll("\\&[a-zA-Z]{0,9};", "").replaceAll("<[^>]*>", "").trim();
                if (pTxt.length() > 20) {
                    desc = pTxt;
                }
            }
        }
        if (StringUtils.isEmpty(title) || StringUtils.isEmpty(date)) {
            return MatchOther.NO;
        }

        NewsDTO news = new NewsDTO();
        news.setDesc(desc == null ? "" : desc);
        news.setTitle(title);
        news.setBelongDate(date);
        news.setLinks(page.getRequest().getUrl());
        news.setAnnouncer(HE_XUN.announcer);
        news.setCategory(HE_XUN.category);

        page.putField(Constants.PARSER_RESULT_ITEM, news);
        return MatchOther.YES;
    }

}